{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "id": "QgumzOZ5wgec",
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# Get started with Metrics Tracking and Monitoring"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "id": "6eM1BMvDwgee",
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "This notebook demonstrates how to use Evidently to:\n",
    "* Generate a model performance report and calculate associated metrics.\n",
    "* Log model metrics to MLFlow.\n",
    "* Store the model in MLFlow as an artifact.\n",
    "* Store the model performance report in MLFlow as an artifact."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "rByuPhg7wgei",
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "import joblib\n",
    "import mlflow\n",
    "import mlflow.sklearn\n",
    "from mlflow.tracking import MlflowClient\n",
    "import pandas as pd\n",
    "\n",
    "from evidently.metric_preset import RegressionPreset\n",
    "from evidently.pipeline.column_mapping import ColumnMapping\n",
    "from evidently.report import Report\n",
    "from pathlib import Path\n",
    "from sklearn import ensemble, model_selection\n",
    "\n",
    "from config import MLFLOW_TRACKING_URI, DATA_DIR, FILENAME, REPORTS_DIR"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "HiiUl3p8wgej",
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "warnings.simplefilter('ignore')"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "id": "zw5Tap_Xwgej",
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "## Load Data"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "id": "VqGH1Mr6wgej",
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "More information about the dataset can be found in UCI machine learning repository: https://archive.ics.uci.edu/ml/datasets/bike+sharing+dataset\n",
    "\n",
    "Acknowledgement: Fanaee-T, Hadi, and Gama, Joao, 'Event labeling combining ensemble detectors and background knowledge', Progress in Artificial Intelligence (2013): pp. 1-15, Springer Berlin Heidelberg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "dKX2YV19wgek",
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "# Download original dataset with: python src/pipelines/load_data.py \n",
    "\n",
    "raw_data = pd.read_csv(f\"../{DATA_DIR}/{FILENAME}\")\n",
    "raw_data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Define column mapping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "_i8edS6Ewgem",
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "target = 'cnt'\n",
    "prediction = 'prediction'\n",
    "numerical_features = ['temp', 'atemp', 'hum', 'windspeed', 'mnth', 'hr', 'weekday']\n",
    "categorical_features = ['season', 'holiday', 'workingday', ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "column_mapping = ColumnMapping()\n",
    "\n",
    "column_mapping.target = 'target'\n",
    "column_mapping.prediction = 'prediction'\n",
    "column_mapping.numerical_features = numerical_features\n",
    "column_mapping.categorical_features = categorical_features"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "id": "dhZOCJZ1wgel",
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "## Train a Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "vTMu0TS_wgem",
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "sample_data = raw_data.set_index('dteday').loc['2011-01-01 00:00:00':'2011-01-28 23:00:00'].reset_index()\n",
    "\n",
    "print(sample_data.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "dqiRpf2Swgem",
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = model_selection.train_test_split(\n",
    "    sample_data[numerical_features + categorical_features],\n",
    "    sample_data[target],\n",
    "    test_size=0.3\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "PWJv-0UKwgen",
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "regressor = ensemble.RandomForestRegressor(random_state = 0, n_estimators = 50)\n",
    "regressor.fit(X_train, y_train) \n",
    "\n",
    "model_path = Path('../models/model.joblib')\n",
    "joblib.dump(regressor, model_path)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "id": "pRV2_SNSwgen",
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "## Build the model validation report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "TZKZ_biHwgen",
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "# Calculate predictions \n",
    "preds_train = regressor.predict(X_train)\n",
    "preds_test = regressor.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "JcEFJgEswgeo",
    "pycharm": {
     "name": "#%%\n"
    }
   },
   "outputs": [],
   "source": [
    "X_train['target'] = y_train\n",
    "X_train['prediction'] = preds_train\n",
    "\n",
    "X_test['target'] = y_test\n",
    "X_test['prediction'] = preds_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "iQk44skSwgeo"
   },
   "outputs": [],
   "source": [
    "regression_performance_report = Report(metrics=[\n",
    "    RegressionPreset(),\n",
    "])\n",
    "\n",
    "regression_performance_report.run(\n",
    "    reference_data=X_train, \n",
    "    current_data=X_test,\n",
    "    column_mapping=column_mapping)\n",
    "\n",
    "regression_performance_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Extract Model Train Metrics from the report\n",
    "\n",
    "train_report_metrics = regression_performance_report.as_dict()\n",
    "me = train_report_metrics['metrics'][0]['result']['current']['mean_error']\n",
    "mae = train_report_metrics['metrics'][0]['result']['current'][\"mean_abs_error\"]\n",
    "\n",
    "print(me, mae)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Log Model Training and Validation Metrics"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Set up MLFlow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set up MLFlow Client\n",
    "mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)\n",
    "client = MlflowClient()\n",
    "\n",
    "print(f\"Client tracking uri: {client.tracking_uri}\")\n",
    "\n",
    "# Set experiment name\n",
    "mlflow.set_experiment('Train Model')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with mlflow.start_run() as run: \n",
    "\n",
    "    # Show newly created run metadata info\n",
    "    print(\"Experiment id: {}\".format(run.info.experiment_id))\n",
    "    print(\"Run id: {}\".format(run.info.run_id))\n",
    "    print(\"Run name: {}\".format(run.info.run_name))\n",
    "    print('MLFlow tracking uri:', mlflow.get_tracking_uri())\n",
    "    print('MLFlow artifact uri:', mlflow.get_artifact_uri())\n",
    "    run_id = run.info.run_id\n",
    "\n",
    "    # Log metrics\n",
    "    mlflow.log_metric('me', round(me, 3))\n",
    "    mlflow.log_metric('mae', round(mae, 3))\n",
    "    \n",
    "    # Log model \n",
    "    mlflow.log_artifact(model_path)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Log Model Validation reports to MLFlow "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "report_path = f\"../{REPORTS_DIR}/train_report.html\"\n",
    "regression_performance_report.save_html(report_path)\n",
    "    \n",
    "with mlflow.start_run(run_id=run_id):\n",
    "    \n",
    "    # Show the run metadata info\n",
    "    print(\"Experiment id: {}\".format(run.info.experiment_id))\n",
    "    print(\"Run id: {}\".format(run.info.run_id))\n",
    "    print(\"Run name: {}\".format(run.info.run_name))\n",
    "    \n",
    "    # Log the regression_performance_report as an artifact\n",
    "    mlflow.log_artifact(report_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
